Code
library(tidyverse)
library(magrittr)
library(gt)
library(patchwork)
library(SummarizedExperiment)
library(tidySummarizedExperiment)
library(MultiAssayExperiment)
tmp <- fs::dir_map("R scripts/", source)
theme_set(theme_light())library(tidyverse)
library(magrittr)
library(gt)
library(patchwork)
library(SummarizedExperiment)
library(tidySummarizedExperiment)
library(MultiAssayExperiment)
tmp <- fs::dir_map("R scripts/", source)
theme_set(theme_light())MultiAssayExperiment objectdata_source <- "real"
mae_files <-
fs::dir_ls(
str_c(
get_data_dir(data_source = data_source),
"02 MAEs/"
),
regexp = "mae_full_.*\\.rds$"
) |>
sort(decreasing = TRUE) |>
magrittr::extract(1)
mae <- readRDS(mae_files)
rm(mae_files)tmp <-
dplyr::inner_join(
mae[["mg"]] |>
as_tibble() |>
filter(!is.na(LBP)) |>
filter(!is.na(rel_abs_bact)) |>
select(.feature, .sample, rel_abs_bact, sample_type) |>
dplyr::rename(
mg_rel_ab_bact = rel_abs_bact
),
mae[["qPCR"]] |>
as_tibble() |>
filter(!is.na(LBP)) |>
select(.feature, .sample, copies_per_swab_med, copies_per_swab_cv, strain_group_qpcr),
by = join_by(.feature, .sample)
)
tmp |>
ggplot() +
aes(x = mg_rel_ab_bact, y = copies_per_swab_med, color = sample_type) +
geom_point(alpha = 0.3, size = 1) +
facet_wrap(~ strain_group_qpcr + .feature) +
scale_y_log10() +
scale_x_log10()tmp |>
dplyr::count(.feature, mg_rel_ab_bact > 0, copies_per_swab_med > 0) |>
ggplot() +
aes(x = `mg_rel_ab_bact > 0`, y = `copies_per_swab_med > 0`) +
geom_tile(aes(fill = n)) +
geom_text(aes(label = n)) +
scale_fill_gradient(low = "white", high = "dodgerblue3") +
facet_wrap(~ .feature) tmp |>
dplyr::count(.feature, mg_rel_ab_bact > 0, copies_per_swab_med > 1e+05) |>
ggplot() +
aes(x = `mg_rel_ab_bact > 0`, y = `copies_per_swab_med > 1e+05`) +
geom_tile(aes(fill = n)) +
geom_text(aes(label = n)) +
scale_fill_gradient(low = "white", high = "dodgerblue3") +
facet_wrap(~ .feature) tmp |>
dplyr::count(.feature, mg_rel_ab_bact > 0, copies_per_swab_med > 10000) |>
ggplot() +
aes(x = `mg_rel_ab_bact > 0`, y = `copies_per_swab_med > 10000`) +
geom_tile(aes(fill = n)) +
geom_text(aes(label = n)) +
scale_fill_gradient(low = "white", high = "dodgerblue3") +
facet_wrap(~ .feature) TODO sensitivity-specificity curves for each LBP
TODO
weekly_samples_from_randomized_participants <-
dplyr::inner_join(
tibble(uid = mae[["mg"]]$uid),
tibble(uid = mae[["amplicon"]]$uid),
by = join_by(uid)
) |>
dplyr::inner_join(
mae@colData |> as.data.frame() |> as_tibble() |>
filter(randomized) |>
select(uid, pid, visit_code, randomized, arm, visit_type),
by = join_by(uid)
) |>
filter(visit_type == "Clinic")
mae_sub <- mae[, weekly_samples_from_randomized_participants$uid]random_pids <- mae_sub$pid |> table() |> sort(decreasing = TRUE) |> names() |> head(10)map(
random_pids,
plot_data_for_pid,
mae_sub = mae_sub
)[[1]]
[[2]]
[[3]]
[[4]]
[[5]]
[[6]]
[[7]]
[[8]]
[[9]]
[[10]]
all_samples_from_randomized_participants <-
dplyr::full_join(
tibble(uid = mae[["mg"]]$uid),
tibble(uid = mae[["amplicon"]]$uid),
by = join_by(uid)
) |>
dplyr::inner_join(
mae@colData |> as.data.frame() |> as_tibble() |>
filter(randomized) |>
select(uid, pid, visit_code, randomized, arm),
by = join_by(uid)
)
mae_sub <- mae[, all_samples_from_randomized_participants$uid]# random_pids <- mae_sub$pid |> table() |> sort(decreasing = TRUE) |> names() |> head(10)map(
random_pids,
plot_data_for_pid,
mae_sub = mae_sub
)[[1]]
[[2]]
[[3]]
[[4]]
[[5]]
[[6]]
[[7]]
[[8]]
[[9]]
[[10]]
plot_data_for_pid(selected_pid = "068100046", mae_sub = mae_sub)We rework this graph for all pids and save it as a png.
plot_path <- str_c(
get_data_dir(data_source = data_source),
"03 QCed MAEs/profiles_plot/")
all_pids <- unique(all_samples_from_randomized_participants$pid)
for (pid in all_pids) {
p <- plot_data_for_pid(selected_pid = pid, mae_sub = mae_sub)
file_name <- paste0("plot_", pid, ".png")
ggsave(
filename = str_c(plot_path, file_name),
plot = p,
width = 15, height = 8, dpi = 300
)
}“Guessing the arms” based on the MG and qPCR data
“LC-115” if at any visit, two-three LBP115-specific strains are detected >0
“LC-106-o” if in group 4
“LC-106-3 or -7” if at any visit, two LBP106-specific strains are detected >0
otherwise -> placebo
TODO
The primary outcome is “colonization with any of the L. crispatus strains contained in the LBP by 5 weeks of follow-up as assessed by metagenomic sequencing of the vaginal microbial community with detection of any one of LBP strains at >5% relative abundance or a combination of the strains accounting for >10% relative abundance by metagenomics.”
We compute, at each visit and for each participant, the total relative abundance of all LBP strains or the maximum relative abundance of any LBP strain.
If the total relative abundance of all LBP strains is larger than 0.1 or the maximum relative abundance of any LBP strain is larger than 0.05, we consider that the LBP achieved colonization_mg at that visit.
colonization_LBP_mg <-
mae[["mg"]] |>
as_tibble() |>
filter(sample_type == "Clinical sample") |>
filter(!is.na(LBP)) |>
group_by(.sample) |>
summarize(
n = n(),
total_LBP_rel_ab_at = sum(rel_abs_bact),
max_LBP_rel_ab_at = max(rel_abs_bact),
n_detected_strains_at = sum(rel_abs_bact > 0),
n_detected_LC106_strains_at = sum(rel_abs_bact[LBP == "LC-106 & LC-115"] > 0),
n_detected_LC115_only_strains_at = sum(rel_abs_bact[LBP == "LC-115"] > 0)
) |>
mutate(
colonized_LBP_at_mg = (total_LBP_rel_ab_at > 0.1) | (max_LBP_rel_ab_at > 0.05)
) colonization_LBP_mg <-
colonization_LBP_mg |>
dplyr::left_join(
mae@colData |> as_tibble() |>
select(uid, pid, visit_code, randomized, arm, location) |>
dplyr::rename(.sample = uid),
by = join_by(.sample)
) |>
arrange(pid, visit_code) colonization_LBP_mg |>
ggplot() +
aes(x = visit_code, y = pid, fill = colonized_LBP_at_mg) +
geom_tile() +
facet_grid(
randomized + arm ~ .,
scales = "free_y", space = "free_y", labeller = label_both
) +
theme(
strip.text.y = element_text(angle = 0)
)From the colonization_mg status at each visit, we can compute the colonization_mg status by each visit.
A participant is considered to have colonized by a visit if they have been colonized at that visit or any previous visit, starting from their post-product visit (“1200” for all groups).
Since we have some missing visits, we impute these as “not colonized”
colonization_LBP_mg <-
colonization_LBP_mg |>
select(-c(arm, randomized, location)) |>
dplyr::full_join(
colonization_LBP_mg |> select(pid, arm, randomized, location) |> distinct() |>
expand_grid(
visit_code = unique(colonization_LBP_mg$visit_code) |> sort()
),
by = join_by(pid, visit_code)
)colonization_LBP_mg |>
ggplot() +
aes(x = visit_code, y = pid, fill = colonized_LBP_at_mg) +
geom_tile() +
facet_grid(
randomized + arm ~ .,
scales = "free_y", space = "free_y", labeller = label_both
) +
theme(
strip.text.y = element_text(angle = 0)
)colonization_LBP_mg <-
colonization_LBP_mg |>
arrange(pid, visit_code) |>
mutate(colonized_LBP_at_mg = colonized_LBP_at_mg |> replace_na(FALSE)) |>
group_by(pid) |>
mutate(
tmp = ifelse(as.numeric(visit_code) < 1200, FALSE, colonized_LBP_at_mg),
colonized_LBP_by_mg = cummax(tmp) |> as.logical()
) |>
ungroup() |>
select(-tmp)colonization_LBP_mg |>
ggplot() +
aes(x = visit_code, y = pid, fill = colonized_LBP_by_mg) +
geom_tile() +
annotate(
geom = "rect",
xmin = 10.5, xmax = 11.5, ymin = -Inf, ymax = Inf,
col = "black", alpha = 0, linewidth = 1
) +
facet_grid(
randomized + arm ~ .,
scales = "free_y", space = "free_y", labeller = label_both
) +
theme(
strip.text.y = element_text(angle = 0)
)We compute, at each visit and for each participant, the total relative abundance of all LBP strains or the maximum relative abundance of any LBP strain.
If the total relative abundance of all LBP strains is larger than 0.1 or the maximum relative abundance of any LBP strain is larger than 0.05, we consider that the LBP achieved colonization_mg at that visit.
colonization_crispatus_mg <-
mae[["mg"]] |>
as_tibble() |>
filter(sample_type == "Clinical sample") |>
filter(species == "crispatus") |>
group_by(.sample) |>
summarize(
total_crispatus_rel_ab_at = sum(rel_abs_bact),
#max_crispatus_rel_ab_at = max(rel_abs_bact),
#n_detected_crispatus_at = sum(rel_abs_bact > 0)
) |>
mutate(
crispatus_dominance_at_mg = (total_crispatus_rel_ab_at > 0.5)
) colonization_crispatus_mg <-
colonization_crispatus_mg |>
dplyr::left_join(
mae@colData |> as_tibble() |>
select(uid, pid, visit_code, randomized, arm, location) |>
dplyr::rename(.sample = uid),
by = join_by(.sample)
) |>
arrange(pid, visit_code) colonization_crispatus_mg |>
ggplot() +
aes(x = visit_code, y = pid, fill = crispatus_dominance_at_mg) +
geom_tile() +
facet_grid(
randomized + arm ~ .,
scales = "free_y", space = "free_y", labeller = label_both
) +
theme(
strip.text.y = element_text(angle = 0)
)From the colonization_mg status at each visit, we can compute the colonization_mg status by each visit.
A participant is considered to have colonized by a visit if they have been colonized at that visit or any previous visit, starting from their post-product visit (“1200” for all groups).
Since we have some missing visits, we impute these as “not colonized”
colonization_crispatus_mg <-
colonization_crispatus_mg |>
select(-c(arm, randomized, location)) |>
dplyr::full_join(
colonization_crispatus_mg |> select(pid, arm, randomized, location) |> distinct() |>
expand_grid(
visit_code = unique(colonization_crispatus_mg$visit_code) |> sort()
),
by = join_by(pid, visit_code)
)colonization_crispatus_mg |>
ggplot() +
aes(x = visit_code, y = pid, fill = crispatus_dominance_at_mg) +
geom_tile() +
facet_grid(
randomized + arm ~ .,
scales = "free_y", space = "free_y", labeller = label_both
) +
theme(
strip.text.y = element_text(angle = 0)
)colonization_crispatus_mg <-
colonization_crispatus_mg |>
arrange(pid, visit_code) |>
mutate(crispatus_dominance_at_mg = crispatus_dominance_at_mg |> replace_na(FALSE)) |>
group_by(pid) |>
mutate(
#tmp = ifelse(as.numeric(visit_code) < 1200, FALSE, crispatus_dominance_at_mg),
crispatus_dominance_by_mg = cummax(crispatus_dominance_at_mg) |> as.logical()
) |>
ungroup() #|>
#select(-tmp)colonization_crispatus_mg |>
ggplot() +
aes(x = visit_code, y = pid, fill = crispatus_dominance_by_mg) +
geom_tile() +
facet_grid(
randomized + arm ~ .,
scales = "free_y", space = "free_y", labeller = label_both
) +
theme(
strip.text.y = element_text(angle = 0)
)p1 <- colonization_LBP_mg |>
ggplot() +
aes(x = visit_code, y = pid, fill = colonized_LBP_at_mg) +
geom_tile() +
facet_grid(
randomized + arm ~ .,
scales = "free_y", space = "free_y", labeller = label_both
) +
theme(
strip.text.y = element_text(angle = 0)
)
p2 <- colonization_crispatus_mg |>
ggplot() +
aes(x = visit_code, y = pid, fill = crispatus_dominance_at_mg) +
geom_tile() +
facet_grid(
randomized + arm ~ .,
scales = "free_y", space = "free_y", labeller = label_both
) +
theme(
strip.text.y = element_text(angle = 0)
)
p1 + p2 +
plot_annotation(
title = str_c("LBP colonization VS crispatus dominance")
) +
plot_layout(ncol = 2) &
theme(
legend.justification = "left"
)p1 <- colonization_LBP_mg |>
ggplot() +
aes(x = visit_code, y = pid, fill = colonized_LBP_by_mg) +
geom_tile() +
annotate(
geom = "rect",
xmin = 10.5, xmax = 11.5, ymin = -Inf, ymax = Inf,
col = "black", alpha = 0, linewidth = 1
) +
facet_grid(
randomized + arm ~ .,
scales = "free_y", space = "free_y", labeller = label_both
) +
theme(
strip.text.y = element_text(angle = 0)
)
p2 <- colonization_crispatus_mg |>
ggplot() +
aes(x = visit_code, y = pid, fill = crispatus_dominance_by_mg) +
geom_tile() +
facet_grid(
randomized + arm ~ .,
scales = "free_y", space = "free_y", labeller = label_both
) +
theme(
strip.text.y = element_text(angle = 0)
)
p1 + p2 +
plot_annotation(
title = str_c("LBP colonization VS crispatus dominance")
) +
plot_layout(ncol = 2) &
theme(
legend.justification = "left"
)We compute, at each visit and for each participant, the total relative abundance of all LBP strains or the maximum relative abundance of any LBP strain.
If the total relative abundance of all LBP strains is larger than 0.1 or the maximum relative abundance of any LBP strain is larger than 0.05, we consider that the LBP achieved colonization at that visit.
colonization_qPCR <-
mae[["qPCR"]] |>
as_tibble() |>
filter(sample_type == "Clinical sample") |>
filter(!is.na(LBP)) |>
group_by(.sample) |>
summarize(
n = n(),
total_load_at = sum(copies_per_swab_med),
n_detected_strains_at = sum(copies_per_swab_med > 0),
n_detected_LC106_strains_at = sum(copies_per_swab_med[LBP == "LC-106 & LC-115"] > 0),
n_detected_LC115_only_strains_at = sum(copies_per_swab_med[LBP == "LC-115"] > 0)
) |>
mutate(
detected_at_qpcr = total_load_at > 0
) colonization_qPCR <-
colonization_qPCR |>
dplyr::left_join(
mae@colData |> as_tibble() |>
select(uid, pid, visit_code, randomized, arm, location) |>
dplyr::rename(.sample = uid),
by = join_by(.sample)
) |>
arrange(pid, visit_code) colonization_qPCR |>
ggplot() +
aes(x = visit_code, y = pid, fill = detected_at_qpcr) +
geom_tile() +
facet_grid(
randomized + arm ~ .,
scales = "free_y", space = "free_y", labeller = label_both
) +
theme(
strip.text.y = element_text(angle = 0)
)From the colonization status at each visit, we can compute the colonization status by each visit.
A participant is considered to have colonized by a visit if they have been colonized at that visit or any previous visit, starting from their post-product visit (“1200” for all groups).
Since we have some missing visits, we impute these as “not colonized”
colonization_qPCR <-
colonization_qPCR |>
select(-c(arm, randomized, location)) |>
dplyr::full_join(
colonization_qPCR |> select(pid, arm, randomized, location) |> distinct() |>
expand_grid(
visit_code = unique(colonization_qPCR$visit_code) |> sort()
),
by = join_by(pid, visit_code)
)colonization_qPCR |>
ggplot() +
aes(x = visit_code, y = pid, fill = detected_at_qpcr) +
geom_tile() +
facet_grid(
randomized + arm ~ .,
scales = "free_y", space = "free_y", labeller = label_both
) +
theme(
strip.text.y = element_text(angle = 0)
)colonization_qPCR <-
colonization_qPCR |>
arrange(pid, visit_code) |>
mutate(colonized_at_qpcr = detected_at_qpcr |> replace_na(FALSE)) |>
group_by(pid) |>
mutate(
tmp = ifelse(as.numeric(visit_code) < 1200, FALSE, colonized_at_qpcr),
detected_by_qpcr = cummax(tmp) |> as.logical()
) |>
ungroup() |>
select(-tmp)colonization_qPCR |>
ggplot() +
aes(x = visit_code, y = pid, fill = detected_by_qpcr) +
geom_tile() +
annotate(
geom = "rect",
xmin = 10.5, xmax = 11.5, ymin = -Inf, ymax = Inf,
col = "black", alpha = 0, linewidth = 1
) +
facet_grid(
randomized + arm ~ .,
scales = "free_y", space = "free_y", labeller = label_both
) +
theme(
strip.text.y = element_text(angle = 0)
)p1 <- colonization_LBP_mg |>
ggplot() +
aes(x = visit_code, y = pid, fill = colonized_LBP_at_mg) +
geom_tile() +
facet_grid(
randomized + arm ~ .,
scales = "free_y", space = "free_y", labeller = label_both
) +
theme(
strip.text.y = element_text(angle = 0)
)
p2 <- colonization_qPCR |>
ggplot() +
aes(x = visit_code, y = pid, fill = detected_by_qpcr) +
geom_tile() +
facet_grid(
randomized + arm ~ .,
scales = "free_y", space = "free_y", labeller = label_both
) +
theme(
strip.text.y = element_text(angle = 0)
)
p1 + p2 +
plot_annotation(
title = str_c("LBP colonization at metagenomics VS qPCR")
) +
plot_layout(ncol = 2) &
theme(
legend.justification = "left"
)p1 <- colonization_LBP_mg |>
ggplot() +
aes(x = visit_code, y = pid, fill = colonized_LBP_by_mg) +
geom_tile() +
annotate(
geom = "rect",
xmin = 10.5, xmax = 11.5, ymin = -Inf, ymax = Inf,
col = "black", alpha = 0, linewidth = 1
) +
facet_grid(
randomized + arm ~ .,
scales = "free_y", space = "free_y", labeller = label_both
) +
theme(
strip.text.y = element_text(angle = 0)
)
p2 <- colonization_qPCR |>
ggplot() +
aes(x = visit_code, y = pid, fill = detected_by_qpcr) +
geom_tile() +
annotate(
geom = "rect",
xmin = 10.5, xmax = 11.5, ymin = -Inf, ymax = Inf,
col = "black", alpha = 0, linewidth = 1
) +
facet_grid(
randomized + arm ~ .,
scales = "free_y", space = "free_y", labeller = label_both
) +
theme(
strip.text.y = element_text(angle = 0)
)
p1 + p2 +
plot_annotation(
title = str_c("LBP colonization by metagenomics VS qPCR")
) +
plot_layout(ncol = 2) &
theme(
legend.justification = "left"
)primary_outcomes assay to MAEse_primary_col_LBP_mg <-
SummarizedExperiment(
assays = list(
outcome =
colonization_LBP_mg |>
# filter(!is.na(pid)) |>
# mutate(.sample = str_c(pid, "_", visit_code)) |>
filter(!is.na(.sample)) |>
select(.sample, colonized_LBP_at_mg, colonized_LBP_by_mg) |>
as.data.frame() |>
column_to_rownames(".sample") |> t()
)
)
se_primary_col_crisp_mg <-
SummarizedExperiment(
assays = list(
outcome =
colonization_crispatus_mg |>
filter(!is.na(.sample)) |>
select(.sample, crispatus_dominance_at_mg, crispatus_dominance_by_mg) |>
as.data.frame() |>
column_to_rownames(".sample") |> t()
)
)
se_primary_col_LBP_qPCR <-
SummarizedExperiment(
assays = list(
outcome =
colonization_qPCR |>
filter(!is.na(.sample)) |>
select(.sample, detected_at_qpcr, detected_by_qpcr) |>
as.data.frame() |>
column_to_rownames(".sample") |> t()
)
)
mae <- c(mae, list(col_LBP_mg = se_primary_col_LBP_mg,
col_crispatus_mg = se_primary_col_crisp_mg,
col_LBP_qPCR = se_primary_col_LBP_qPCR))MultiAssayExperiment objectsWe save the “master” MAE (with all assays); but if needed for publication, we can subset the MAE to only include the assays/data of interest.
saveRDS(
mae,
str_c(
get_data_dir(data_source = data_source),
"03 QCed MAEs/",
"mae_full_", today() |> str_remove_all("-"), ".rds"
)
)